#### How to use this R-script ####

#' You're going to load in one data file with all the data for the analysis
#' Then you're going to run regressions to make tables and figures
#' And reshape some data to make figures
#' There are two tables:
#' Table 2 is an object called 'table_2_latex' 
#' Table 3 is an object called 'table_3_latex' 
#' -- The outputs here are copied into the manuscript in Latex 
#' and later converted to Word, but you can see them in R by calling the object
#' There are five figures:
#' These are named: figure_1, figure_2, etc.
#' Some of these are two plots combined into one:
#' e.g., 'figure_1' is a single plot that has figure_1a and figure_1b in it
#' These are saved as EPS 
#' Note: The final robustness test loads in new data from 'idealpoint_chains.csv' 
#' This is a large file and kind of cumbersome to work with, so is only loaded in at the end
#' You should be able to run the whole script from top to bottom
#' And then call the following to check the tables and figures:
#' table_2_latex
#' table_3_latex
#' figure_1
#' figure_2
#' figure_3
#' figure_4
#' figure_5

#### Initialize ####

library(Amelia) # Working with imputed data
library(sampleSelection) # Heckman models
library(sandwich) # Robust standard errors
library(lmtest) # Robust standard errors
library(tidyverse) # Data wrangling
library(magrittr) # More pipe options: %$%
library(stargazer) # Output tables
library(gridExtra) # Combining ggplots into one plot

## Set a working directory 

# setwd("rowan_isq_2020_replication")

#### Load in the replication data and clean it ####

load("rowan_isq_2020_replication_data.Rdata") # A list containing some data

# This is an amelia object of 15 imputed datasets for analysis
imputed_data <- data_list[1]

# These data.frames are used for figures later
gra_preds <- as.data.frame(data_list[13:20])
cslf_preds <- as.data.frame(data_list[21:28])
gcg <- as.data.frame(data_list[29:34])

## Recode some variables and nest the imputed data into imputation-specific rows

all_imputations <- bind_rows(unclass(imputed_data$imputations), .id = "m") %>%
  # Recode some variables
  mutate(sum_first = sum_climate_memberships, 
         share_first = share_climate_memberships,
         d1_first = d1_mean, 
         d2_first = d2_mean,
         fossil_rents = log(1+fossil_rents),
         ghg_ex = log(ghg_ex),
         gdp_mkt = log(gdp_mkt),
         gdp_pcmkt = log(gdp_pcmkt),
         population = log(population),
         vulnerability = 100*vulnerability,
         indcshare_gdr_topcoded = ifelse(.$indcshare_gdr < -75, -75, # Roughly 5th and 95th percentiles
                                         ifelse(.$indcshare_gdr > 300, 300, .$indcshare_gdr)),
         d1_mean_first = d1_mean, 
         d2_mean_first = d2_mean,
         fh_ipolity2_first = fh_ipolity2,
         epi_first = epi,
         elec_renew_first = elec_renew,
         vulnerability_first = vulnerability,
         ngo_number_first = ngo_number,
         sum_ieas_first = sum_ieas,
         fossil_rents_first = fossil_rents) %>% 
  # Before grouping/nesting, filter out unneccessary years and Paraguay
  filter(year == 2007, # 2007 has the best coverage for missing data
         iso3c != "PRY") %>% # Paraguay has incredibly bad skewed GHG target
  # Group by their imputations
  group_by(m) %>%
  # Nest the imputed dataset 
  nest()

## Not run: save this tidied imputed data as its own file for retrieval 
# save(all_imputations, file = "data/output/cleaned_imputed_data.Rdata")

#### Regression analysis ####

#' Code to make table 2 based on three separate regression models
#' Table 2 is an object called: table_2_latex
#' #' Code to make table 3 based on three separate regression models
#' Table 3 coefficients and standard errors are melded to adjust for the multiple imputation
#' Table 3 is an object called: table_3_latex

#### Table 2 ####

# Table 2 has no control variables, so create a new data.frame with just the key regressors
df_nocontrols <- all_imputations %>%
  filter(m == "imp1") %>% 
  unnest(cols = c(data)) %>%
  select(iso3c, indcshare_median, ambitionseen, copenhagen_ghgtarget,
         sum_climate_memberships, sum_first, 
         share_climate_memberships, share_first, 
         d1_mean, d1_first,
         d2_mean, d2_first)

## Run each model
m1_sum <- sampleSelection::selection(selection = ambitionseen ~ copenhagen_ghgtarget + sum_first,
                    outcome =  indcshare_median ~ sum_climate_memberships, 
                    data = df_nocontrols, 
                    method = "ml")
m2_share <- selection(selection = ambitionseen ~ copenhagen_ghgtarget + share_first,
                      outcome =  indcshare_median ~ share_climate_memberships, 
                      data = df_nocontrols, 
                      method = "ml")
m3_idealpoints <- selection(selection = ambitionseen ~ copenhagen_ghgtarget + d1_first + d2_first,
                            outcome =  indcshare_median ~ d1_mean + d2_mean, 
                            data = df_nocontrols, 
                            method = "ml")

## Add robust standard errors and tidy and model
m1_sum_robust <- as.data.frame(coeftest(m1_sum, vcov = sandwich)[,1:4]) 
m1_sum_robust <- m1_sum_robust %>%
  select(estimate = 1, std_error = 2, t_stat = 3, p_value = 4) %>%
  mutate(term = row.names(.)) %>%
  mutate(stage = c(rep("Selection", times = length(m1_sum$param$index$betaS)),
                   rep("Outcome", times = length(c(m1_sum$param$index$betaO, m1_sum$param$index$errTerms))))) %>%
  mutate(model = "Sum")

m2_share_robust <- as.data.frame(coeftest(m2_share, vcov = sandwich)[,1:4])
m2_share_robust <- m2_share_robust %>%
  select(estimate = 1, std_error = 2, t_stat = 3, p_value = 4) %>%
  mutate(term = row.names(.)) %>%
  mutate(stage = c(rep("Selection", times = length(m2_share$param$index$betaS)),
                   rep("Outcome", times = length(c(m2_share$param$index$betaO, m2_share$param$index$errTerms))))) %>%
  mutate(model = "Share")

m3_idealpoints_robust <- as.data.frame(coeftest(m3_idealpoints, vcov = sandwich)[,1:4])
m3_idealpoints_robust <- m3_idealpoints_robust %>%
  select(estimate = 1, std_error = 2, t_stat = 3, p_value = 4) %>%
  mutate(term = row.names(.)) %>%
  mutate(stage = c(rep("Selection", times = length(m3_idealpoints$param$index$betaS)),
                   rep("Outcome", times = length(c(m3_idealpoints$param$index$betaO, m3_idealpoints$param$index$errTerms))))) %>%
  mutate(model = "IRT")

## Combine models into a regression table
nocontrols_table <- rbind(m1_sum_robust, m2_share_robust, m3_idealpoints_robust) %>% 
  mutate(estimate = as.character(round(estimate, 3)),
         # estimate = ifelse(p_value < 0.01, paste(estimate, "**", sep = ""),
         #                   ifelse(p_value<0.05, paste(estimate, "*", sep = ""), estimate)),
         std_error = as.character(round(std_error, 3)),
         std_error = paste("(", std_error, ")", sep = "")) %>%
  select(-t_stat, -p_value) %>% 
  unite("spec", term, stage, model, sep = "-") %>% 
  gather(key = key, value = value, estimate:std_error) %>%
  separate(spec, c("term", "model", "stage"), sep = "-") %>%
  unite("spec", stage, model, sep = "-") %>%
  mutate(term = ifelse(.$term == "share_first", "share_climate_memberships", 
                       ifelse(.$term == "sum_first", "sum_climate_memberships",
                              ifelse(.$term == "d1_first", "d1_mean",
                                     ifelse(.$term == "d2_first", "d2_mean",
                                            ifelse(.$term == "X.Intercept.", "Intercept", 
                                                   ifelse( .$term == "X.Intercept..1", "Intercept", term))))))) %>%
  filter(term!="sigma") %>% 
  spread(., spec, value) %>%
  select(term, "Sum-Selection", "Sum-Outcome", 
         "Share-Selection", "Share-Outcome",
         "IRT-Selection", "IRT-Outcome")

## Latex output of final table
table_2_latex <- nocontrols_table %>%
  stargazer::stargazer(., summary = FALSE, rownames = FALSE,
                       out = "table_2.tex",
                       title = "Heckman sample selection models estimating the relationship between participation in climate institutions and states' Paris GHG targets. Outcome variable is the percentage difference between a country's INDC target and the median of their five equity quotas. Robust standard errors in parentheses. Models contain 172 observations, of which 94 are observed and 78 are censored.")

#### Table 3 ####

## Run three regression models (models 4, 5, 6) and save the outputs in a tibble

models_controls <- all_imputations %>%
  mutate(
    ## ---- Model 4: Sum with covariates
    model_4 = data %>%
      # Define the estimating equations
      purrr::map(
        ~ sampleSelection::selection(
          # Selection stage
          selection = ambitionseen ~ copenhagen_ghgtarget +
            sum_first + fh_ipolity2_first + epi_first + elec_renew_first +
            vulnerability_first + ngo_number_first + sum_ieas_first + fossil_rents_first,
          # Outcome stage
          outcome = indcshare_median ~
            sum_climate_memberships + fh_ipolity2 + epi + elec_renew +
            vulnerability + ngo_number + sum_ieas + fossil_rents,
          # Define data as each imputed dataset in the nested tibble
          data = .,
          # Define estimator
          method = "ml")),
    # Add robust standard errors to estimands
    robust_m4 = model_4 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    ## ---- Model 5: Share with covariates
    model_5 = data %>%
      map(
        ~ selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            share_first + fh_ipolity2_first + epi_first + elec_renew_first +
            vulnerability_first + ngo_number_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_median ~ 
            share_climate_memberships + fh_ipolity2 + epi + elec_renew +
            vulnerability + ngo_number + sum_ieas + fossil_rents,
          data = .,
          method = "ml")),
    robust_m5 = model_5 %>%
      map( ~ coeftest(., vcov = sandwich)),
    ## ---- Model 6: Extended covariates
    model_6 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            elec_renew_first + sum_ieas_first + fossil_rents_first +
            fh_ipolity2_first + epi_first + vulnerability_first + ngo_number_first,
          outcome = indcshare_median ~
            d1_mean + d2_mean +
            fh_ipolity2 + epi + elec_renew +
            vulnerability + ngo_number + sum_ieas + fossil_rents,
          data = .,
          method = "ml")),
    robust_m6 = model_6 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich))) 

## Extract variable names
m4_var_names <- row.names(models_controls$robust_m4[[1]])
m4_var_names[1] <- "Intercept_first"
m5_var_names <- row.names(models_controls$robust_m5[[1]])
m5_var_names[1] <- "Intercept_first"
m6_var_names <- row.names(models_controls$robust_m6[[1]])
m6_var_names[1] <- "Intercept_first"

##' The next step is to collect the coefficients and standard errors 
##' for the model run on each of the 15 imputed datasets
##' and then meld these coefficients and standard errors 
##' to get our final ouputs

## Model 4: Collect the outputs
m4_outputs <- models_controls %>%
  # Select only the imputed dataset and the robust coefficients and standard errors
  select(m, robust_m4) %>%
  mutate(robust_m4_df = robust_m4 %>% 
           map(~ as.data.frame(.[,1:4]))) %>% 
  select(m, robust_m4_df) %>% 
  # Unnest into one large tibble
  unnest(cols = robust_m4_df) %>% 
  # Add variable names
  mutate(term = row.names(models_controls$robust_m4[[1]])) %>% 
  # Rename variables
  select(m, term, estimate = Estimate, std_error = "Std. Error",
         t_stat = "z value", p_value = "Pr(>|z|)") %>% 
  # Save as data.frame
  as.data.frame() 

## Extract the coefficients to meld
m4_betas <- m4_outputs %>%
  mutate(term = rep(m4_var_names, 15)) %>%
  # Select only the coefficients
  select(m, term, estimate) %>% 
  # Reshape wide
  pivot_wider(names_from = term, 
              values_from = estimate,
              values_fn = list(estimate = sum)) %>% 
  select(-m) %>% 
  # Create a matrix of coefficients for 21 variables across 15 imputations: 15 x 21 
  as.matrix()

## Extract the standard errors the same way
m4_std_errors <- m4_outputs %>%
  mutate(term = rep(m4_var_names, 15)) %>%
  select(m, term, std_error) %>% 
  pivot_wider(names_from = term, 
              values_from = std_error,
              values_fn = list(std_error = sum)) %>% 
  select(-m) %>% 
  as.matrix()

## Meld the coefficients and standard errors to get final estimates for model 4
m4_melded <- Amelia::mi.meld(m4_betas, m4_std_errors) 

## Tidy the model 4 outputs, to be merged later with models 5 and 6 later
m4_columns <- as.data.frame(cbind(t(m4_melded$q.mi),
                                  t(m4_melded$se.mi))) %>% 
  magrittr::set_colnames(c("estimate", "std_error")) %>% 
  mutate(term = rownames(.)) %>% 
  select(term, everything()) %>% 
  mutate(stage = str_ends(.$term, pattern = "_first")) %>% 
  mutate(stage = ifelse(.$stage == TRUE, "First", "Second")) %>% 
  mutate(term = ifelse(.$stage == "First", 
                       str_sub(.$term, start = 1L, end = -7L), .$term)) %>% 
  mutate(stage = ifelse(.$term == "copenhagen_ghgtarget", "First", .$stage)) %>% 
  mutate(term = ifelse(.$term == "sum", "sum_climate_memberships", .$term)) %>%
  mutate(term = ifelse(.$term == "(Intercept)", "Intercept", .$term)) %>%
  pivot_wider(names_from = stage, values_from = estimate:std_error) %>% 
  magrittr::set_colnames(c("term", "m4_estimate_first", "m4_estimate_second",
                           "m4_stderror_first", "m4_stderror_second")) %>% 
  mutate_at(vars(m4_estimate_first:m4_stderror_second), list(~ round(., 3))) %>% 
  pivot_longer(names_to = "param", values_to = "values_now", c(m4_estimate_first:m4_stderror_second)) %>%
  separate(param, into = c("model", "param", "stage"), sep = "_") %>% 
  pivot_wider(values_from = "values_now", names_from = "stage") %>% 
  select(term, param, m4_first = first, m4_second = second)


## Model 5: The same as above, but a different regressor
m5_outputs <- models_controls %>%
  select(m, robust_m5) %>%
  mutate(robust_m5_df = robust_m5 %>% 
           map(~ as.data.frame(.[,1:4]))) %>% 
  select(m, robust_m5_df) %>% 
  unnest(cols = robust_m5_df) %>% 
  mutate(term = row.names(models_controls$robust_m5[[1]])) %>% 
  select(m, term, estimate = Estimate, std_error = "Std. Error",
         t_stat = "z value", p_value = "Pr(>|z|)") %>% 
  as.data.frame() 

m5_betas <- m5_outputs %>%
  mutate(term = rep(m5_var_names, 15)) %>%
  select(m, term, estimate) %>% 
  pivot_wider(names_from = term, 
              values_from = estimate,
              values_fn = list(estimate = sum)) %>% 
  select(-m) %>% 
  as.matrix()

m5_std_errors <- m5_outputs %>%
  mutate(term = rep(m5_var_names, 15)) %>%
  select(m, term, std_error) %>% 
  pivot_wider(names_from = term, 
              values_from = std_error,
              values_fn = list(std_error = sum)) %>% 
  select(-m) %>% 
  as.matrix()

m5_melded <- mi.meld(m5_betas, m5_std_errors) 

m5_columns <- as.data.frame(cbind(t(m5_melded$q.mi),
                                  t(m5_melded$se.mi))) %>% 
  magrittr::set_colnames(c("estimate", "std_error")) %>% 
  mutate(term = rownames(.)) %>% 
  select(term, everything()) %>% 
  mutate(stage = str_ends(.$term, pattern = "_first")) %>% 
  mutate(stage = ifelse(.$stage == TRUE, "First", "Second")) %>% 
  mutate(term = ifelse(.$stage == "First", 
                       str_sub(.$term, start = 1L, end = -7L), .$term)) %>% 
  mutate(stage = ifelse(.$term == "copenhagen_ghgtarget", "First", .$stage)) %>% 
  mutate(term = ifelse(.$term == "share", "share_climate_memberships", .$term)) %>%
  mutate(term = ifelse(.$term == "(Intercept)", "Intercept", .$term)) %>%
  pivot_wider(names_from = stage, values_from = estimate:std_error) %>% 
  magrittr::set_colnames(c("term", "m5_estimate_first", "m5_estimate_second",
                           "m5_stderror_first", "m5_stderror_second")) %>% 
  mutate_at(vars(m5_estimate_first:m5_stderror_second), list(~ round(., 3))) %>% 
  pivot_longer(names_to = "param", values_to = "values_now", c(m5_estimate_first:m5_stderror_second)) %>%
  separate(param, into = c("model", "param", "stage"), sep = "_") %>% 
  pivot_wider(values_from = "values_now", names_from = "stage") %>% 
  select(term, param, m5_first = first, m5_second = second)


## Model 6: The same as above, but two new regressors

m6_outputs <- models_controls %>%
  select(m, robust_m6) %>%
  mutate(robust_m6_df = robust_m6 %>% 
           map(~ as.data.frame(.[,1:4]))) %>% 
  select(m, robust_m6_df) %>% 
  unnest(cols = robust_m6_df) %>% 
  mutate(term = row.names(models_controls$robust_m6[[1]])) %>% 
  select(m, term, estimate = Estimate, std_error = "Std. Error",
         t_stat = "z value", p_value = "Pr(>|z|)") %>% 
  as.data.frame() 

m6_betas <- m6_outputs %>%
  mutate(term = rep(m6_var_names, 15)) %>%
  select(m, term, estimate) %>% 
  pivot_wider(names_from = term, 
              values_from = estimate,
              values_fn = list(estimate = sum)) %>% 
  select(-m) %>% 
  as.matrix()

m6_std_errors <- m6_outputs %>%
  mutate(term = rep(m6_var_names, 15)) %>%
  select(m, term, std_error) %>% 
  pivot_wider(names_from = term, 
              values_from = std_error,
              values_fn = list(std_error = sum)) %>% 
  select(-m) %>% 
  as.matrix()

m6_melded <- mi.meld(m6_betas, m6_std_errors) 

m6_columns <- as.data.frame(cbind(t(m6_melded$q.mi),
                                  t(m6_melded$se.mi))) %>% 
  magrittr::set_colnames(c("estimate", "std_error")) %>% 
  mutate(term = rownames(.)) %>% 
  select(term, everything()) %>% 
  mutate(stage = str_ends(.$term, pattern = "_first")) %>% 
  mutate(stage = ifelse(.$stage == TRUE, "First", "Second")) %>% 
  mutate(term = ifelse(.$stage == "First", 
                       str_sub(.$term, start = 1L, end = -7L), .$term)) %>% 
  mutate(stage = ifelse(.$term == "copenhagen_ghgtarget", "First", .$stage)) %>% 
  mutate(term = ifelse(.$term == "d1_first", "d1_mean", .$term)) %>%
  mutate(term = ifelse(.$term == "d2_first", "d2_mean", .$term)) %>%
  mutate(term = ifelse(.$term == "(Intercept)", "Intercept", .$term)) %>%
  pivot_wider(names_from = stage, values_from = estimate:std_error) %>% 
  magrittr::set_colnames(c("term", "m6_estimate_first", "m6_estimate_second",
                           "m6_stderror_first", "m6_stderror_second")) %>% 
  mutate_at(vars(m6_estimate_first:m6_stderror_second), list(~ round(., 3))) %>% 
  pivot_longer(names_to = "param", values_to = "values_now", c(m6_estimate_first:m6_stderror_second)) %>%
  separate(param, into = c("model", "plus", "stage"), sep = "_") %>% 
  pivot_wider(values_from = "values_now", names_from = "stage") %>% 
  select(term, param = plus, m6_first = first, m6_second = second)

## Combine models 4, 5 and 6 into one table

controls_table <- full_join(m4_columns, m5_columns, by = c("term", "param")) %>% 
  full_join(., m6_columns, by = c("term", "param")) %>% 
  mutate_at(vars(m4_first:m6_second), ~sprintf("%.3f", round(.,3))) %>% 
  # mutate(m4_first = ifelse(.$param=="stderror" & !is.na(.$m4_first), 
  #                          paste("(", .$m4_first, ")", sep = ""), .$m4_first)) %>% 
  mutate(m4_first = ifelse(.$param=="stderror" & m4_first!="NA", 
                           paste("(", .$m4_first, ")", sep = ""), .$m4_first)) %>% 
  mutate(m5_first = ifelse(.$param=="stderror" & m5_first!="NA", 
                           paste("(", .$m5_first, ")", sep = ""), .$m5_first)) %>% 
  mutate(m6_first = ifelse(.$param=="stderror" & m6_first!="NA", 
                           paste("(", .$m6_first, ")", sep = ""), .$m6_first)) %>% 
  mutate(m4_second = ifelse(.$param=="stderror" & m4_second!="NA", 
                            paste("(", .$m4_second, ")", sep = ""), .$m4_second)) %>% 
  mutate(m5_second = ifelse(.$param=="stderror" & m5_second!="NA", 
                            paste("(", .$m5_second, ")", sep = ""), .$m5_second)) %>% 
  mutate(m6_second = ifelse(.$param=="stderror" & m6_second!="NA", 
                            paste("(", .$m6_second, ")", sep = ""), .$m6_second))

## A vector to sort terms using
term_order <- cbind.data.frame(term = c("Intercept", 
                                        "copenhagen_ghgtarget", 
                                        "sum_climate_memberships",
                                        "fh_ipolity2", 
                                        "epi",
                                        "elec_renew", 
                                        "vulnerability",
                                        "ngo_number", 
                                        "sum_ieas", 
                                        "fossil_rents", 
                                        "sigma", 
                                        "rho", 
                                        "share_climate_memberships", 
                                        "d1_mean", 
                                        "d2_mean"),
                               term_order = c(15, 13, 1, 11, 9, 
                                              8, 7, 10, 6, 12,
                                              14, 14, 2, 3, 4),
                               param = rep(c("estimate", "stderror"), times = 15))

## Final table with models 4, 5 and 6
table_3_latex <- full_join(term_order, controls_table, by = c("term", "param")) %>% 
  arrange(term_order, param) %>% 
  mutate_all(~(str_replace(., "(NA)", ""))) %>%
  mutate_all(~(str_replace(., "NA", ""))) %>% 
  mutate_all(~(str_replace(., "()", ""))) %>% 
  filter(term!="sigma") %>% 
  select(-term_order, -param) %>% 
  stargazer::stargazer(., summary = FALSE, rownames = FALSE, 
                       out = "table_3.tex",
                       title = "Heckman sample selection models estimating the relationship between participation in climate institutions and states' Paris GHG targets. Outcome variable is the percentage difference between a country's INDC target and the median of their five equity quotas. Covariates are measured in 2007 and missing data is accounted for using multiple imputation. Robust standard errors in parentheses. Models contain 172 observations, of which 94 are observed and 78 are censored.")
savehistory(file = "isq_rep_hist_1.Rhistory")

#### Robustness tests: models in figure 5 ####

## This is run in many steps, but all inside one command: 
# 1. Run the sampleSelection::selection() models using purrr::map()
# 2. Add robust standard errors to the estimates using lmtest::coeftest(., vcov = sandwich)
# 3. Keep only the coefficients and standard errors for ideal point regressors
# 4. Run all models
# 5. Unnest the outputs: coefficients from 18 models on 15 imputed datasets
# 6. Then meld the outputs to get the correct coefficients and standard errors

# ---- Model 3: No covariates
# ---- Model 6: Extended covariates
# ---- Model 7: Signicant covariates
# ---- Model 8: Significant covariates plus GHG emissions
# ---- Model 9: Significant covariates plus GDP per capita
# ---- Model 10: Significant covariates plus GDP per capita squared
# ---- Model 11: Alternative selection indicator (#2)
# ---- Model 12: Alternative selection indicator (#3)
# ---- Model 13: Ambition measured using the mean rather than the median
# ---- Model 14: Ambition measured using CPC rather than the median
# ---- Model 15: Ambition measured using EPC rather than the median
# ---- Model 16: Ambition measured using CAP rather than the median
# ---- Model 17: Ambition measured using GDR rather than the median
# ---- Model 18: Ambition measured using CER rather than the median

models_imputations <- all_imputations %>%
  mutate(
    ## ---- Model 3: No covariates
    model_3 = data %>%
      # Define the estimating equations
      map(
        ~ sampleSelection::selection(
          # Selection stage
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first,
          # Outcome stage
          outcome = indcshare_median ~
            d1_mean + d2_mean,
          # Define data
          data = .,
          # Define estimator
          method = "ml")),
    # Add robust standard errors to estimands
    robust_m3 = model_3 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m3 = robust_m3 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m3 = robust_m3 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 6: Extended covariates
    model_6 = data %>%
      purrr::map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            elec_renew_first + sum_ieas_first + fossil_rents_first +
            fh_ipolity2_first + epi_first + vulnerability_first + ngo_number_first,
          outcome = indcshare_median ~
            d1_mean + d2_mean +
            elec_renew_first + sum_ieas_first + fossil_rents_first +
            fh_ipolity2_first + epi_first + vulnerability_first + ngo_number_first,
          data = .,
          method = "ml")),
    robust_m6 = model_6 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m6 = robust_m6 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m6 = robust_m6 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 7: Significant covariates
    model_7 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_median ~
            d1_mean + d2_mean +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,
          method = "ml")),
    robust_m7 = model_7 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m7 = robust_m7 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m7 = robust_m7 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 8: Significant covariates plus GHG emissions
    model_8 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            ghg_ex +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_median ~
            d1_mean + d2_mean +
            ghg_ex +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,
          method = "ml")),
    robust_m8 = model_8 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m8 = robust_m8 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m8 = robust_m8 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 9: Significant covariates plus GDP per capita
    model_9 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            gdp_pcmkt +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_median ~
            d1_mean + d2_mean +
            gdp_pcmkt +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,
          method = "ml")),
    robust_m9 = model_9 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m9 = robust_m9 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m9 = robust_m9 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 10: Significant covariates plus GDP per capita squared
    model_10 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            gdp_pcmkt + I(gdp_pcmkt*gdp_pcmkt) +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_median ~
            d1_mean + d2_mean +
            gdp_pcmkt + I(gdp_pcmkt*gdp_pcmkt) +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,

          method = "ml")),
    robust_m10 = model_10 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m10 = robust_m10 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m10 = robust_m10 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 11: Alternative selection indicator (#2)
    model_11 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen2 ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_median ~
            d1_mean + d2_mean +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,
          method = "ml")),
    robust_m11 = model_11 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m11 = robust_m11 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m11 = robust_m11 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 12: Alternative selection indicator (#3)
    model_12 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen3 ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_median ~
            d1_mean + d2_mean +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,
          method = "ml")),
    robust_m12 = model_12 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m12 = robust_m12 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m12 = robust_m12 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 13: Ambition measured using the mean rather than the median
    model_13 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_mean ~
            d1_mean + d2_mean +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,
          method = "ml")),
    robust_m13 = model_13 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m13 = robust_m13 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m13 = robust_m13 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 14: Ambition measured using CPC rather than the median
    model_14 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_cpc ~
            d1_mean + d2_mean +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,
          method = "ml")),
    robust_m14 = model_14 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m14 = robust_m14 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m14 = robust_m14 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 15: Ambition measured using EPC rather than the median
    model_15 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_epc ~
            d1_mean + d2_mean +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,
          method = "ml")),
    robust_m15 = model_15 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m15 = robust_m15 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m15 = robust_m15 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 16: Ambition measured using CAP rather than the median
    model_16 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_cap ~
            d1_mean + d2_mean +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,
          method = "ml")),
    robust_m16 = model_16 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m16 = robust_m16 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m16 = robust_m16 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 17: Ambition measured using GDR rather than the median
    model_17 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_gdr_topcoded ~
            d1_mean + d2_mean +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,
          method = "ml")),
    robust_m17 = model_17 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m17 = robust_m17 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m17 = robust_m17 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 18: Ambition measured using CER rather than the median
    model_18 = data %>%
      map(
        ~ sampleSelection::selection(
          selection = ambitionseen ~ copenhagen_ghgtarget +
            d1_mean_first + d2_mean_first +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          outcome = indcshare_cer ~
            d1_mean + d2_mean +
            elec_renew_first + sum_ieas_first + fossil_rents_first,
          data = .,
          method = "ml")),
    robust_m18 = model_18 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m18 = robust_m18 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m18 = robust_m18 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2]),
    ## ---- Model 19: OLS
    model_19 = data %>%
      # Define the estimating equations
      map(
        ~ lm(indcshare_median ~
               d1_mean + d2_mean +
               elec_renew + sum_ieas + fossil_rents,
             # Define data
             data = .)),
    # Add robust standard errors to estimands
    robust_m19 = model_19 %>%
      map( ~ lmtest::coeftest(., vcov = sandwich)),
    d1_m19 = robust_m19 %>%
      map( ~ .[which(rownames(.) == "d1_mean"), 1:2]),
    d2_m19 = robust_m19 %>%
      map( ~ .[which(rownames(.) == "d2_mean"), 1:2])) %>%
  # Keep only the coefficients and standard errors
  select(m,
         d1_m3,  d2_m3,
         d1_m6,  d2_m6,
         d1_m7,  d2_m7,
         d1_m8,  d2_m8,
         d1_m9,  d2_m9,
         d1_m10, d2_m10,
         d1_m11, d2_m11,
         d1_m12, d2_m12,
         d1_m13, d2_m13,
         d1_m14, d2_m14,
         d1_m15, d2_m15,
         d1_m16, d2_m16,
         d1_m17, d2_m17,
         d1_m18, d2_m18,
         d1_m19, d2_m19) %>%
  # Unnest and transform into a dataframe
  unnest(cols = 2:ncol(.)) %>%
  as.data.frame() %>%
  # Attach labels
  mutate(parameter = rep(c("beta", "se"), times = 15))

## Extract the coefficients
models_imputations %>%
  filter(parameter == "beta") %>%
  select(-m, -parameter) %>%
  as.data.frame() -> just_coefs

## Extract the standard errors
models_imputations %>%
  filter(parameter == "se") %>%
  select(-m, -parameter) %>%
  as.data.frame() -> just_ses

## Meld the outputs
coefs_melded <- mi.meld(just_coefs, just_ses) 

## Tidy the outputs
outcomes_summary <- t(rbind(coefs_melded$q.mi, coefs_melded$se.mi)) %>%
  as.data.frame() %>%
  mutate(model = rownames(.)) %>%
  rename(beta = 1, se = 2) %>%
  mutate(conf_low = beta - 1.96*se,
         conf_high = beta + 1.96*se) %>%
  select(model, beta, se, conf_low, conf_high) %>%
  mutate(p_under_05 = ifelse(sign(conf_low)==sign(conf_high), "*", "")) %>%
  arrange(model) %>% 
  separate(model, into = c("dimension", "model"), sep = "_")
outcomes_summary
savehistory(file = "isq_rep_hist_2.Rhistory")


##' The robustness test with the re-sampled ideal points is a bit different
##' Load in 'idealpoint_chains.csv', which is a large dataframe
##' 190 states x 2 estimates per state x 4951 iterations
##' And reshape

chains <- read_csv("idealpoint_chains.csv") %>% 
  pivot_longer(names_to = "chain", values_to = "estimate", d1_chain_5000:"d2_chain_5e+05") %>% 
  separate(chain, into = c("dimension", "chain", "run"), sep = "_") %>% 
  select(-chain) %>% 
  pivot_wider(names_from = "dimension", values_from = "estimate")

##' It will be too cumbersome to meld the coefficients for all 4951 iterations
##' So we're going to average the imputed data instead
##' And only columns 34--45 have imputed data, so only those need to be averaged

average_imputed_data <-
  cbind.data.frame(imputed_data$imputations$imp1[,1:33],
                   cbind(imputed_data$imputations$imp1[,34:ncol(imputed_data$imputations$imp1)] +
                           imputed_data$imputations$imp2[,34:ncol(imputed_data$imputations$imp2)] +
                           imputed_data$imputations$imp3[,34:ncol(imputed_data$imputations$imp3)] +
                           imputed_data$imputations$imp4[,34:ncol(imputed_data$imputations$imp4)] +
                           imputed_data$imputations$imp5[,34:ncol(imputed_data$imputations$imp5)] +
                           imputed_data$imputations$imp5[,34:ncol(imputed_data$imputations$imp6)] +
                           imputed_data$imputations$imp5[,34:ncol(imputed_data$imputations$imp7)] +
                           imputed_data$imputations$imp5[,34:ncol(imputed_data$imputations$imp8)] +
                           imputed_data$imputations$imp5[,34:ncol(imputed_data$imputations$imp9)] +
                           imputed_data$imputations$imp5[,34:ncol(imputed_data$imputations$imp10)] +
                           imputed_data$imputations$imp5[,34:ncol(imputed_data$imputations$imp11)] +
                           imputed_data$imputations$imp5[,34:ncol(imputed_data$imputations$imp12)] +
                           imputed_data$imputations$imp5[,34:ncol(imputed_data$imputations$imp13)] +
                           imputed_data$imputations$imp5[,34:ncol(imputed_data$imputations$imp14)] +
                           imputed_data$imputations$imp5[,34:ncol(imputed_data$imputations$imp15)])/15)

##' Merge the average imputed data with the chains and
##' tidy this data to match the other analyses
chain_data_for_analysis <- average_imputed_data %>% 
  mutate(fossil_rents = log(1+fossil_rents),
         ghg_ex = log(ghg_ex),
         gdp_mkt = log(gdp_mkt),
         gdp_pcmkt = log(gdp_pcmkt),
         population = log(population),
         vulnerability = 100*vulnerability) %>% 
  filter(year == 2007, # 2007 has the best coverage for missing data
         iso3c != "PRY") %>%  # Paraguay has incredibly bad skewed GHG target
  select(iso3c, indcshare_median, ambitionseen, copenhagen_ghgtarget, 
         elec_renew, sum_ieas, fossil_rents) %>% 
  full_join(., chains, by = "iso3c") %>% 
  filter(run!="230700") %>% 
  filter(!is.na(run))

##' This is the code to run the regression
##' Runs 1000s of regressions and counts up to 500000

draws <- unique(chain_data_for_analysis$run)
d1_beta <- NA
d2_beta <- NA
for (draw in draws){
  model <- sampleSelection::selection(
    selection = ambitionseen ~ copenhagen_ghgtarget + # Selection stage
      d1 + d2 +
      elec_renew + sum_ieas + fossil_rents,
    outcome = indcshare_median ~  # Outcome stage
      d1 + d2 +
      elec_renew + sum_ieas + fossil_rents,
    data = chain_data_for_analysis %>% filter(run == draw), # Define data
    method = "ml")# Define estimator
  robust <- lmtest::coeftest(model, vcov = sandwich)
  d1_model <- robust[9,1]
  d2_model <- robust[10,1]
  d1_beta <- rbind(d1_beta, d1_model)
  d2_beta <- rbind(d2_beta, d2_model)
  print(draw)
}

## Extract IRT 1D median estimate and upper and lower bounds
d1_chain_lower_ci <- quantile(d1_beta, 0.025, na.rm=T) # Lower CI
d1_chain_beta <- quantile(d1_beta, 0.5, na.rm=T) # Median estimate
d1_chain_upper_ci <- quantile(d1_beta, 0.975, na.rm=T) # Upper CI

## Extract IRT 2D median estimate and upper and lower bounds
d2_chain_lower_ci <- quantile(d2_beta, 0.025, na.rm=T) # Lower CI 
d2_chain_beta <- quantile(d2_beta, 0.5, na.rm=T) # Median estimate
d2_chain_upper_ci <- quantile(d2_beta, 0.975, na.rm=T) # Upper CI 

## Create a vector to merge into the above summary table 
m20_d1 <- c("XX. Re-sampled", "m20", "d1", 
            d1_chain_beta, "", 
            d1_chain_lower_ci, d1_chain_upper_ci, "*", "1.2", "Joiners")
m20_d2 <- c("XX. Re-sampled", "m20", "d2", 
            d2_chain_beta, "", 
            d2_chain_lower_ci, d2_chain_upper_ci, "*", "0.8", "Deepeners v. fragmenters")


#### Figure 5: Combine all outputs for robustness plot ####

real_names <- cbind.data.frame(model_name = c("III. No controls", 
                                              "VI. Extended controls", 
                                              "VII. Controls",
                                              "VIII. Controls + \nGHG emissions",
                                              "IX. Controls + \nGDP per capita",
                                              "X. Controls + \nGDP per capita, squared",
                                              "XI. Exclude indexed \nGHG targets",
                                              "XII. Exclude non-GHG \ntargets",
                                              "XIII. Mean equitable quota",
                                              "XIV. CPC quota", 
                                              "XV. EPC quota", 
                                              "XVI. CAP quota", 
                                              "XVII. GDR quota",
                                              "XVIII. CER quota",
                                              "XIX. OLS estimator"), 
                               model_number = c("m3", 
                                                "m6", 
                                                "m7", 
                                                "m8", 
                                                "m9", 
                                                "m10", 
                                                "m11", 
                                                "m12", 
                                                "m13", 
                                                "m14",
                                                "m15", 
                                                "m16", 
                                                "m17", 
                                                "m18",
                                                "m19"))

secret_weapon_table <- full_join(real_names, 
                                 outcomes_summary, 
                                 by = c("model_number" = "model")) %>% 
  mutate(rank_order = rep(seq(from = 16, to = 2, by = -1), each = 2)) %>%
  mutate(rank_order = ifelse(.$dimension == "d1", .$rank_order+0.2, .$rank_order-0.2)) %>%
  mutate(Dimension = ifelse(.$dimension=="d2", 
                            "Deepeners v. fragmenters",
                            "Joiners")) %>% 
  mutate(model_name = as.character(model_name))


secret_weapon_table <- rbind(secret_weapon_table,
                             m20_d1,
                             m20_d2) %>% 
  mutate(rank_order = as.numeric(rank_order)) %>% 
  mutate_at(vars(beta:conf_high), list(~ as.numeric(.))) %>% 
  arrange(-rank_order)

# Extract labels for figure
labels_secret_weapon <- c(unique(secret_weapon_table$model_name[1:30]),
                          "XX. Resampled")

## Make figure 5
figure_5 <- ggplot(secret_weapon_table, aes(beta, rank_order, color = Dimension)) + 
  geom_vline(xintercept = 0, linetype = 2, size = 1.25) +
  scale_color_manual(values=c(rgb(0.19, 0.55, 0.91), "black")) +
  scale_shape_manual(values=c(16, 3)) +
  # scale_linetype_manual(values=c(1, 2)) +
  geom_point(aes(color=Dimension, shape=Dimension), size = 2.75) +
  geom_segment(aes(x=conf_low, xend = conf_high, y = rank_order, yend=rank_order)) +
  coord_cartesian(xlim=c(-60,60)) +
  scale_x_continuous(breaks = seq(-60, 60, 20)) +
  scale_y_continuous(breaks = 1:16, 
                     labels=rev(labels_secret_weapon),
                     position = "right") +
  theme(legend.position = "bottom",
        legend.text=element_text(size=12),
        panel.background = element_rect(fill=NA), 
        panel.grid.major.x = element_line(colour = "grey90"),
        panel.grid.minor.x = element_line(colour = "grey90"),
        panel.grid.major.y = element_line(colour = "grey90"),
        panel.ontop = FALSE,
        text=element_text(size=14),
        axis.line = element_line(size = .5, colour = "black")) +
  labs(x="Coefficient and confidence intervals", 
       y="")+
  guides(col = guide_legend(nrow=2))
ggsave("figure_5.eps", height=8, width=6, units = "in")

#### Figure 4: Marginal effects ####

## Run OLS model for marginal effects
m21 <- all_imputations %>%
  filter(m == "imp10") %>% 
  unnest() %>% 
  filter(iso3c!="PRY") %>%
  lm(indcshare_median ~ d1_mean + d2_mean +
       sum_ieas + elec_renew + fossil_rents, data = .) 

## Get fitted values
preds <- as.data.frame(
  predict(m21, 
          interval="confidence", 
          newdata = data.frame(
            d1_mean = 0,
            d2_mean = seq(-2,2,0.1),
            sum_ieas = mean(m21$model$sum_ieas, na.rm = T),
            elec_renew = mean(m21$model$elec_renew, na.rm = T),
            fossil_rents = mean(m21$model$fossil_rents, na.rm = T))))
## Define range of the predictor 
preds$ideal_point <- seq(-2,2,0.1)

## Plot
figure_4 <- ggplot(preds, aes(ideal_point, fit)) +
  labs(x = "IRT 2D: Fragmenters (low) and deepeners (high)",
       y = "Predicted ambition of Paris target") + 
  geom_ribbon(data = preds, 
              aes(x = ideal_point,
                  ymin = lwr, ymax = upr),
              fill = rgb(0.19, 0.55, 0.91), # myblue
              alpha = 1) +
  geom_line(size = 3) +
  scale_y_continuous(limits = c(-80,55), breaks = seq(-75,50, 25)) +
  theme(legend.position = "none",
        text=element_text(size=18),
        axis.text = element_text(size = rel(1.25)),
        panel.background = element_rect(fill=NA), 
        panel.grid.major.x = element_line(colour = "grey90"),
        panel.grid.major.y = element_line(colour = "grey90"),
        panel.ontop = FALSE,
        axis.line = element_line(size = .5, colour = "black"))
ggsave("figure_4.eps", units = "in", width = 8, height = 6)

## Communicating the effect size in standard deviations

# Standard deviation of the outcome variable
all_imputations %>%
  filter(m == "imp10") %>% 
  unnest(cols = data) %>% 
  filter(iso3c!="PRY") %>% 
  ungroup() %>% 
  select(indcshare_median) %>% 
  summarise(sd = sd(indcshare_median, na.rm=T)) %>% 
  as.numeric()
# 58.32727

# Ideal points are already standardized to have mean of 0 and sd of 1

# Standardize the effect sizes
summary(m21)[4] %>% # Take the model output
  as.data.frame() %>% 
  select(beta = 1, std_error = 2) %>% # Take the beta and standard error
  slice(3) %>% # Keep only the IRT 2D estimates
  # Divide everything by the standard deviation of the outcome variable: 58.3
  mutate(beta_sd = beta/58.32727, 
         std_error_sd = std_error/58.32727,
         conf_low = beta_sd - 1.96*std_error_sd,
         conf_high = beta_sd + 1.96*std_error_sd) %>% 
  select(beta_sd, conf_low, conf_high)
# 0.34 [0.17, 0.52]


#### Figure 3: GHG emissions trajectories ####

## Define graphing parameters
myblue <- rgb(0.19, 0.55, 0.91) 
myred <- rgb(0.97, 0.36, 0.34)
point_exp <- 4
line_exp <- 1.75
text_exp <- 7

## Create new data.frames of the key variables for France and Morocco

france_df <- imputed_data$imputations$imp1 %>% 
  as.data.frame() %>% 
  filter(year<2015 & year>1989) %>% 
  select(iso3c, year, ghg_ex, indc_emissions_2030, ends_with("2030"), quota_median) %>% 
  filter(iso3c=="FRA") %>% 
  mutate_at(vars(ghg_ex:quota_median), ~(./1000))

france_df %>% filter(year==2014) %>% t()

morocco_df <- imputed_data$imputations$imp1 %>% 
  as.data.frame() %>% 
  filter(year<2015 & year>1989) %>% 
  select(iso3c, year, ghg_ex, indc_emissions_2030, ends_with("2030"), quota_median) %>% 
  filter(iso3c=="MAR") %>% 
  mutate_at(vars(ghg_ex:quota_median), ~(./1000))

morocco_df %>% filter(year==2014) %>% t()

## Make Morocco plot
figure_3b <- ggplot(morocco_df, aes(year, ghg_ex)) +
  # Set axis labels
  labs(x="Year", y="", title = "Morocco") +
  # Plot GHG emissions
  geom_line(size=line_exp) +
  # Set x- and y-axis limits
  coord_cartesian(xlim=c(1990,2035), ylim = c(0,200)) +
  # Add a point at the INDC level
  geom_point(aes(x = 2030, y=148.932), 
             color=myblue, 
             size=point_exp) +
  # Add a label
  annotate("text", x= 2030, y = 148.932*0.9, label = "INDC",
           hjust = 1, vjust = 1, colour = myblue, size = text_exp) +
  # Add CER label
  annotate("text", x= 2031, y = 74.86, label ='CER', hjust = 0, colour = "black", size = text_exp) +
  # Add EPC label
  annotate("text", x= 2031, y = 153.66, label ='EPC', hjust = 0, colour = "black", size = text_exp) +
  # Add CAP label
  annotate("text", x= 2031, y = 160.752, label ='CAP', hjust = 0, colour = "black", size = text_exp) +
  # Add CPC label
  annotate("text", x= 2031, y = 174.148, label ='CPC', hjust = 0, colour = "black", vjust = 1, size = text_exp) +
  # Add GDR label
  annotate("text", x= 2031, y = 100.076, label ='GDR', hjust = 0, colour = "black", vjust = 0, size = text_exp) +
  # Connect the dots as a GHG trajectory
  geom_segment(aes(x=2014, xend = 2030, y = 88.1, yend=148.932),
               linetype = 2, color=myblue, size=line_exp) +
  geom_segment(aes(x=2014, xend = 2030, y = 88.1, yend=153.66),
               linetype = 2, color=myred, size=line_exp) +
  # Add the median
  geom_point(aes(x = 2030, y=153.66), color=myred, size = point_exp) +
  annotate("text", x= 2030, y = 153.66*1.051, label ='Median', 
           hjust = 1, vjust = 0, colour = myred, size = text_exp) +
  theme(legend.position = "none", 
        panel.background = element_rect(fill=NA), 
        text=element_text(size=18),
        axis.text = element_text(size = rel(1.25)),
        panel.grid.major.y = element_line(colour = "grey90"),
        panel.ontop = FALSE,
        axis.line = element_line(size = .5, colour = "black"))

## Make France plot
figure_3a <- ggplot(france_df, aes(year, ghg_ex)) +
  geom_line(size=line_exp) +
  coord_cartesian(xlim=c(1990,2035), ylim = c(0,620)) +
  labs(x="Year", y=expression(GHG~emissions~(Mt~CO[2])), title = "France") +
  geom_point(aes(x = 2030, y=377.3), 
             color=myblue, 
             size=point_exp) +
  annotate("text", x= 2030, y = 377.3*1.1, label = "INDC",
           hjust = 1, vjust = 0, colour = myblue, size = text_exp) +
  annotate("text", x= 2031, y = 512.05, label ='CER', hjust = 0, colour = "black", size = text_exp) +
  annotate("text", x= 2031, y = 409.64, label ='EPC', hjust = 0, colour = "black", size = text_exp) +
  annotate("text", x= 2031, y = 204.82, label ='CAP', hjust = 0, colour = "black", size = text_exp) +
  annotate("text", x= 2031, y = 296.45, label ='CPC', hjust = 0, colour = "black", vjust = 1, size = text_exp) +
  annotate("text", x= 2031, y = 301.84, label ='GDR', hjust = 0, colour = "black", vjust = 0, size = text_exp) +
  geom_segment(aes(x=2014, xend = 2030, y = 472, yend=377.3),
               linetype = 2, color=myblue, size=line_exp) +
  geom_segment(aes(x=2014, xend = 2030, y = 472, yend=301.84),
               linetype = 2, color=myred, size=line_exp) +
  geom_point(aes(x = 2030, y=301.84), color=myred, size = point_exp) +
  annotate("text", x= 2030, y = 301.84*0.9, label ='Median', 
           hjust = 1, vjust = 1, colour = myred, size = text_exp) +
  theme(legend.position = "none", 
        panel.background = element_rect(fill=NA), 
        text=element_text(size=18),
        axis.text = element_text(size = rel(1.25)),
        panel.grid.major.y = element_line(colour = "grey90"),
        panel.ontop = FALSE,
        axis.line = element_line(size = .5, colour = "black"))

## Combine both plots and save 
figure_3 <- gridExtra::grid.arrange(figure_3a, figure_3b,
                                    nrow = 1, ncol=2,
                                    bottom = "",
                                    left = "")
ggsave("figure_3.eps", figure_3, width = 16, height = 6, units = "in")


#### Figure 2: IRT membership predictions for two institutions ####

gra_preds <- read_csv("gra_predictions.csv")
cslf_preds <- read_csv("cslf_predictions.csv")

#   |institution | alpha| b1_mean| b2_mean|  intercept|       slope|
#   |:-----------|-----:|-------:|-------:|----------:|-----------:|
#   |cslf        | 3.117|   2.239|  -0.042| -74.214286| -53.3095238|
#   |graagg      | 1.354|   0.862|   1.184|   1.143581|   0.7280405|

# Define some colours
myblue <- rgb(0.19, 0.55, 0.91) 
myred <- rgb(0.97, 0.36, 0.34)

## Global Research Alliance on Agricultutral Greenhouse Gases

figure_2a <- ggplot(gra_preds, aes(d1_mean, d2_mean, color=Prediction)) +
  geom_abline(intercept = 1.143581, slope = -0.7280405, linetype=2, size =2) + 
  geom_point(aes(shape=Prediction, size = Prediction)) +
  scale_color_manual(values=c(myred, myred, myblue, myblue)) +
  scale_size_manual(values=c(4, 2.75, 2.75, 4)) +
  scale_shape_manual(values=c(3, 1, 1, 3)) +
  labs(title="Global Research Alliance on \nAgricultural Greenhouse Gases",
       x="Propensity to participate (IRT 1D)",
       y="Pro-climate action (IRT 2D)",
       legend="") +
  scale_y_continuous(limits = c(-2.5,2.5)) +
  scale_x_continuous(limits=c(-2.5,2.5)) +
  geom_abline(intercept = 0, slope = -1, color="gray80") +
  theme(legend.position = "bottom",
        legend.text=element_text(size=10),
        panel.background = element_rect(fill=NA),
        text=element_text(size=18),
        axis.text = element_text(size = rel(1.25)),
        panel.grid.major = element_line(colour = "grey80"),
        panel.ontop = FALSE,
        axis.line = element_line(size = .5, colour = "black")) +
  guides(col = guide_legend(nrow = 2)) +
  annotate("text", x= 2.25, y = 2, hjust = 1,
           label ='Predicted members',
           colour = "black", size = 5) +
  annotate("text", x= 2.25, y = -2, hjust = 1,
           label ='Predicted non-members',
           colour = "black", size = 5)

## Carbon Sequestration Leadership Forum
figure_2b <- ggplot(cslf_preds, aes(d1_mean, d2_mean, color=Prediction)) +
  geom_abline(intercept = -74.214286, slope = 53.3095238, linetype=2, size = 2) + # CSLF
  geom_point(aes(shape=Prediction, size = Prediction)) +
  scale_color_manual(values=c(myred, myred, myblue, myblue)) +
  scale_size_manual(values=c(4, 2.75, 2.75, 4)) +
  scale_shape_manual(values=c(3, 1, 1, 3)) +
  geom_abline(intercept = 0, slope = -1, color="gray80") +
  labs(title="Carbon Sequestration \nLeadership Forum",
       x="Propensity to participate (IRT 1D)",
       y="Pro-climate action (IRT 2D)",
       legend="") +
  scale_y_continuous(limits = c(-2.5,2.5)) +
  scale_x_continuous(limits=c(-2.5,2.5)) +
  theme(legend.position = "bottom",
        legend.text=element_text(size=10),
        panel.background = element_rect(fill=NA),
        axis.text = element_text(size = rel(1.25)),
        panel.grid.major = element_line(colour = "grey80"),
        panel.ontop = FALSE,
        text=element_text(size=18),
        # text=element_text(size=18, family = "LM Sans 10"), # LM Sans 10
        axis.line = element_line(size = .5, colour = "black")) +
  guides(col = guide_legend(nrow = 2)) +
  annotate("text", x= 2, y = 0, hjust = 0,
           label ='Predicted \nmembers',
           colour = "black", size = 5) +
  annotate("text", x= -1, y = -2, hjust = 0,
           label ='Predicted non-members',
           colour = "black", size = 5)

## Combine both plots and save 
figure_2 <- grid.arrange(figure_2a, figure_2b,
                                    nrow = 1, ncol=2,
                                    bottom = "",
                                    left = "")
ggsave("figure_2.eps", figure_2, width = 16, height = 8, units = "in")


savehistory(file = "rowan_isq_2020_replication_log.Rhistory")



#### Figure 1: Climate institutions growth over time and scatterplot of membership ####

gcg <- read_csv("final_gcg_list.csv")

## Table APP-1: List of climate institutions

gcg %>% 
  select(fullname, created, members) %>% 
  knitr::kable(.,
               format = "latex",
               linesep="") 

## Reshape data for count of GCGs over time
figure_1a <- gcg %>%
  mutate(index = 1) %>% 
  group_by(created) %>% 
  summarise(count = sum(index, na.rm=T)) %>% 
  # ungroup() %>% 
  mutate(cumulative = cumsum(count)) %>% 
  select(year = created, cumulative) %>% 
  ggplot(., aes(year, cumulative)) +
  geom_line(size = 2) +
  labs(title="",
       x="Year",
       y="Cumulative climate institutions",
       legend="") +
  scale_y_continuous(limits = c(0,65)) +
  scale_x_continuous(limits=c(1990,2015)) +
  theme(legend.position = "none",
        legend.text=element_text(size=10),
        panel.background = element_rect(fill=NA),
        text=element_text(size=18),
        axis.text = element_text(size = rel(1.25)),
        panel.ontop = FALSE,
        axis.line = element_line(size = .5, colour = "black"))


# Labels for scatterplot
gcg$label <- ifelse(
  gcg$j=="unfccc", "UNFCCC", ifelse(
    gcg$j=="kyoto", "Kyoto Protocol", ifelse(
      gcg$j=="cdm", "CDM", ifelse(
        gcg$j=="ggfrp", "GGFRP", ifelse(
          gcg$j=="irena", "IRENA", ifelse(
            gcg$j=="cem", "CEM", ifelse(
              gcg$j=="petersberg", "Petersberg", ifelse(
                gcg$j=="pmr", "PMR", ifelse(
                  gcg$j=="ccac", "CCAC", ifelse(
                    gcg$j=="doha", "Doha Amendment", NA))))))))))
gcg$label <- as.character(gcg$label)

## Make scatterplot for figure 1B
figure_1b <- ggplot(gcg, aes(created, members, label = label)) + 
  geom_point(size = 3) +
  geom_text(aes(created, members + 2, label = label), 
            vjust = 0,
            size = 5) +
  labs(title="",
       x="Year",
       y="Maximum number of members",
       legend="") +
  scale_y_continuous(limits = c(0,200)) +
  scale_x_continuous(limits=c(1990,2015)) +
  theme(legend.position = "none",
        legend.text=element_text(size=10),
        panel.background = element_rect(fill=NA),
        text=element_text(size=18),
        axis.text = element_text(size = rel(1.25)),
        panel.ontop = FALSE,
        axis.line = element_line(size = .5, colour = "black"))

## Combine both plots and save 
figure_1 <- grid.arrange(figure_1a, figure_1b,
                         nrow = 1, ncol=2,
                         bottom = "",
                         left = "")
ggsave("figure_1.eps", figure_1, width = 16, height = 6, units = "in")

savehistory(file = "isq_rep_hist_3.Rhistory")
